9ea00614e257bfad64d6befd890772c1ca1b2394,modules/hadoop/src/test/java/org/gridgain/grid/kernal/processors/hadoop/GridHadoopMapReduceTest.java,GridHadoopMapReduceTest,testWholeMapReduceExecution,#,57
Before Change
* @throws Exception If fails.
*/
public void testWholeMapReduceExecution() throws Exception {
File testInputFile = File.createTempFile(GridHadoopWordCount2.class.getSimpleName(), "-input");
testInputFile.deleteOnExit();
generateTestFile(testInputFile, "red", 100000, "blue", 200000, "green", 150000, "yellow", 70000);
File testOutputDir = Files.createTempDirectory("job-output").toFile();
for (int i = 0; i < 16; i++) {
boolean useNewMapper = (i & 1) == 0;
boolean useNewCombiner = (i & 2) == 0;
boolean useNewReducer = (i & 4) == 0;
boolean useCustomSerializer = (i & 8) == 0;
JobConf jobConf = new JobConf();
if (useCustomSerializer)
jobConf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, CustomSerialization.class.getName());
//To split into about 40 items for v2
jobConf.setInt(FileInputFormat.SPLIT_MAXSIZE, 65000);
//For v1
jobConf.setInt("fs.local.block.size", 65000);
GridHadoopWordCount1.setTasksClasses(jobConf, !useNewMapper, !useNewCombiner, !useNewReducer);
Job job = Job.getInstance(jobConf);
GridHadoopWordCount2.setTasksClasses(job, useNewMapper, useNewCombiner, useNewReducer);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path(testInputFile.getAbsolutePath()));
FileOutputFormat.setOutputPath(job, new Path(testOutputDir.getAbsolutePath()));
job.setJarByClass(GridHadoopWordCount2.class);
After Change
* @throws Exception If fails.
*/
public void testWholeMapReduceExecution() throws Exception {
GridGgfsPath inDir = new GridGgfsPath(PATH_INPUT);
ggfs.mkdirs(inDir);
GridGgfsPath inFile = new GridGgfsPath(inDir, GridHadoopWordCount2.class.getSimpleName() + "-input");
generateTestFile(inFile.toString(), "red", 100000, "blue", 200000, "green", 150000, "yellow", 70000 );
for (int i = 0; i < 16; i++) {
ggfs.delete(new GridGgfsPath(PATH_OUTPUT), true);
boolean useNewMapper = (i & 1) == 0;
boolean useNewCombiner = (i & 2) == 0;
boolean useNewReducer = (i & 4) == 0;
boolean useCustomSerializer = (i & 8) == 0;
JobConf jobConf = new JobConf();
if (useCustomSerializer)
jobConf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, CustomSerialization.class.getName());
//To split into about 40 items for v2
jobConf.setInt(FileInputFormat.SPLIT_MAXSIZE, 65000);
//For v1
jobConf.setInt("fs.local.block.size", 65000);
// File system coordinates.
jobConf.set("fs.default.name", GGFS_SCHEME);
jobConf.set("fs.ggfs.impl", "org.gridgain.grid.ggfs.hadoop.v1.GridGgfsHadoopFileSystem");
jobConf.set("fs.AbstractFileSystem.ggfs.impl", "org.gridgain.grid.ggfs.hadoop.v2.GridGgfsHadoopFileSystem");
GridHadoopWordCount1.setTasksClasses(jobConf, !useNewMapper, !useNewCombiner, !useNewReducer);
Job job = Job.getInstance(jobConf);
GridHadoopWordCount2.setTasksClasses(job, useNewMapper, useNewCombiner, useNewReducer);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path(GGFS_SCHEME + inFile.toString()));
FileOutputFormat.setOutputPath(job, new Path(GGFS_SCHEME + PATH_OUTPUT));
job.setJarByClass(GridHadoopWordCount2.class);